#read in the datalibrary(data.table)two <- data.table::fread(file.path('C:/Users/ellya/OneDrive/Desktop/PM566labs/2002_2.5PM_data.csv'),header =TRUE, sep =',')ttwo <- data.table::fread(file.path('C:/Users/ellya/OneDrive/Desktop/PM566labs/2022_2.5PM_data.csv'),header =TRUE, sep =',')#Check the dimenstions, headers, and footersdim(two)
[1] 15976 22
dim(ttwo)
[1] 59756 22
head(two)
Date Source Site ID POC Daily Mean PM2.5 Concentration Units
<char> <char> <int> <int> <num> <char>
1: 01/05/2002 AQS 60010007 1 25.1 ug/m3 LC
2: 01/06/2002 AQS 60010007 1 31.6 ug/m3 LC
3: 01/08/2002 AQS 60010007 1 21.4 ug/m3 LC
4: 01/11/2002 AQS 60010007 1 25.9 ug/m3 LC
5: 01/14/2002 AQS 60010007 1 34.5 ug/m3 LC
6: 01/17/2002 AQS 60010007 1 41.0 ug/m3 LC
Daily AQI Value Local Site Name Daily Obs Count Percent Complete
<int> <char> <int> <num>
1: 81 Livermore 1 100
2: 93 Livermore 1 100
3: 74 Livermore 1 100
4: 82 Livermore 1 100
5: 98 Livermore 1 100
6: 115 Livermore 1 100
AQS Parameter Code AQS Parameter Description Method Code
<int> <char> <int>
1: 88101 PM2.5 - Local Conditions 120
2: 88101 PM2.5 - Local Conditions 120
3: 88101 PM2.5 - Local Conditions 120
4: 88101 PM2.5 - Local Conditions 120
5: 88101 PM2.5 - Local Conditions 120
6: 88101 PM2.5 - Local Conditions 120
Method Description CBSA Code
<char> <int>
1: Andersen RAAS2.5-300 PM2.5 SEQ w/WINS 41860
2: Andersen RAAS2.5-300 PM2.5 SEQ w/WINS 41860
3: Andersen RAAS2.5-300 PM2.5 SEQ w/WINS 41860
4: Andersen RAAS2.5-300 PM2.5 SEQ w/WINS 41860
5: Andersen RAAS2.5-300 PM2.5 SEQ w/WINS 41860
6: Andersen RAAS2.5-300 PM2.5 SEQ w/WINS 41860
CBSA Name State FIPS Code State
<char> <int> <char>
1: San Francisco-Oakland-Hayward, CA 6 California
2: San Francisco-Oakland-Hayward, CA 6 California
3: San Francisco-Oakland-Hayward, CA 6 California
4: San Francisco-Oakland-Hayward, CA 6 California
5: San Francisco-Oakland-Hayward, CA 6 California
6: San Francisco-Oakland-Hayward, CA 6 California
County FIPS Code County Site Latitude Site Longitude
<int> <char> <num> <num>
1: 1 Alameda 37.68753 -121.7842
2: 1 Alameda 37.68753 -121.7842
3: 1 Alameda 37.68753 -121.7842
4: 1 Alameda 37.68753 -121.7842
5: 1 Alameda 37.68753 -121.7842
6: 1 Alameda 37.68753 -121.7842
tail(two)
Date Source Site ID POC Daily Mean PM2.5 Concentration Units
<char> <char> <int> <int> <num> <char>
1: 12/10/2002 AQS 61131003 1 15 ug/m3 LC
2: 12/13/2002 AQS 61131003 1 15 ug/m3 LC
3: 12/22/2002 AQS 61131003 1 1 ug/m3 LC
4: 12/25/2002 AQS 61131003 1 23 ug/m3 LC
5: 12/28/2002 AQS 61131003 1 5 ug/m3 LC
6: 12/31/2002 AQS 61131003 1 6 ug/m3 LC
Daily AQI Value Local Site Name Daily Obs Count Percent Complete
<int> <char> <int> <num>
1: 62 Woodland-Gibson Road 1 100
2: 62 Woodland-Gibson Road 1 100
3: 6 Woodland-Gibson Road 1 100
4: 77 Woodland-Gibson Road 1 100
5: 28 Woodland-Gibson Road 1 100
6: 33 Woodland-Gibson Road 1 100
AQS Parameter Code AQS Parameter Description Method Code
<int> <char> <int>
1: 88101 PM2.5 - Local Conditions 117
2: 88101 PM2.5 - Local Conditions 117
3: 88101 PM2.5 - Local Conditions 117
4: 88101 PM2.5 - Local Conditions 117
5: 88101 PM2.5 - Local Conditions 117
6: 88101 PM2.5 - Local Conditions 117
Method Description CBSA Code
<char> <int>
1: R & P Model 2000 PM2.5 Sampler w/WINS 40900
2: R & P Model 2000 PM2.5 Sampler w/WINS 40900
3: R & P Model 2000 PM2.5 Sampler w/WINS 40900
4: R & P Model 2000 PM2.5 Sampler w/WINS 40900
5: R & P Model 2000 PM2.5 Sampler w/WINS 40900
6: R & P Model 2000 PM2.5 Sampler w/WINS 40900
CBSA Name State FIPS Code State
<char> <int> <char>
1: Sacramento--Roseville--Arden-Arcade, CA 6 California
2: Sacramento--Roseville--Arden-Arcade, CA 6 California
3: Sacramento--Roseville--Arden-Arcade, CA 6 California
4: Sacramento--Roseville--Arden-Arcade, CA 6 California
5: Sacramento--Roseville--Arden-Arcade, CA 6 California
6: Sacramento--Roseville--Arden-Arcade, CA 6 California
County FIPS Code County Site Latitude Site Longitude
<int> <char> <num> <num>
1: 113 Yolo 38.66121 -121.7327
2: 113 Yolo 38.66121 -121.7327
3: 113 Yolo 38.66121 -121.7327
4: 113 Yolo 38.66121 -121.7327
5: 113 Yolo 38.66121 -121.7327
6: 113 Yolo 38.66121 -121.7327
head(ttwo)
Date Source Site ID POC Daily Mean PM2.5 Concentration Units
<char> <char> <int> <int> <num> <char>
1: 01/01/2022 AQS 60010007 3 12.7 ug/m3 LC
2: 01/02/2022 AQS 60010007 3 13.9 ug/m3 LC
3: 01/03/2022 AQS 60010007 3 7.1 ug/m3 LC
4: 01/04/2022 AQS 60010007 3 3.7 ug/m3 LC
5: 01/05/2022 AQS 60010007 3 4.2 ug/m3 LC
6: 01/06/2022 AQS 60010007 3 3.8 ug/m3 LC
Daily AQI Value Local Site Name Daily Obs Count Percent Complete
<int> <char> <int> <num>
1: 58 Livermore 1 100
2: 60 Livermore 1 100
3: 39 Livermore 1 100
4: 21 Livermore 1 100
5: 23 Livermore 1 100
6: 21 Livermore 1 100
AQS Parameter Code AQS Parameter Description Method Code
<int> <char> <int>
1: 88101 PM2.5 - Local Conditions 170
2: 88101 PM2.5 - Local Conditions 170
3: 88101 PM2.5 - Local Conditions 170
4: 88101 PM2.5 - Local Conditions 170
5: 88101 PM2.5 - Local Conditions 170
6: 88101 PM2.5 - Local Conditions 170
Method Description CBSA Code
<char> <int>
1: Met One BAM-1020 Mass Monitor w/VSCC 41860
2: Met One BAM-1020 Mass Monitor w/VSCC 41860
3: Met One BAM-1020 Mass Monitor w/VSCC 41860
4: Met One BAM-1020 Mass Monitor w/VSCC 41860
5: Met One BAM-1020 Mass Monitor w/VSCC 41860
6: Met One BAM-1020 Mass Monitor w/VSCC 41860
CBSA Name State FIPS Code State
<char> <int> <char>
1: San Francisco-Oakland-Hayward, CA 6 California
2: San Francisco-Oakland-Hayward, CA 6 California
3: San Francisco-Oakland-Hayward, CA 6 California
4: San Francisco-Oakland-Hayward, CA 6 California
5: San Francisco-Oakland-Hayward, CA 6 California
6: San Francisco-Oakland-Hayward, CA 6 California
County FIPS Code County Site Latitude Site Longitude
<int> <char> <num> <num>
1: 1 Alameda 37.68753 -121.7842
2: 1 Alameda 37.68753 -121.7842
3: 1 Alameda 37.68753 -121.7842
4: 1 Alameda 37.68753 -121.7842
5: 1 Alameda 37.68753 -121.7842
6: 1 Alameda 37.68753 -121.7842
tail(ttwo)
Date Source Site ID POC Daily Mean PM2.5 Concentration Units
<char> <char> <int> <int> <num> <char>
1: 12/01/2022 AQS 61131003 1 3.4 ug/m3 LC
2: 12/07/2022 AQS 61131003 1 3.8 ug/m3 LC
3: 12/13/2022 AQS 61131003 1 6.0 ug/m3 LC
4: 12/19/2022 AQS 61131003 1 34.8 ug/m3 LC
5: 12/25/2022 AQS 61131003 1 23.2 ug/m3 LC
6: 12/31/2022 AQS 61131003 1 1.0 ug/m3 LC
Daily AQI Value Local Site Name Daily Obs Count Percent Complete
<int> <char> <int> <num>
1: 19 Woodland-Gibson Road 1 100
2: 21 Woodland-Gibson Road 1 100
3: 33 Woodland-Gibson Road 1 100
4: 99 Woodland-Gibson Road 1 100
5: 77 Woodland-Gibson Road 1 100
6: 6 Woodland-Gibson Road 1 100
AQS Parameter Code AQS Parameter Description Method Code
<int> <char> <int>
1: 88101 PM2.5 - Local Conditions 145
2: 88101 PM2.5 - Local Conditions 145
3: 88101 PM2.5 - Local Conditions 145
4: 88101 PM2.5 - Local Conditions 145
5: 88101 PM2.5 - Local Conditions 145
6: 88101 PM2.5 - Local Conditions 145
Method Description CBSA Code
<char> <int>
1: R & P Model 2025 PM-2.5 Sequential Air Sampler w/VSCC 40900
2: R & P Model 2025 PM-2.5 Sequential Air Sampler w/VSCC 40900
3: R & P Model 2025 PM-2.5 Sequential Air Sampler w/VSCC 40900
4: R & P Model 2025 PM-2.5 Sequential Air Sampler w/VSCC 40900
5: R & P Model 2025 PM-2.5 Sequential Air Sampler w/VSCC 40900
6: R & P Model 2025 PM-2.5 Sequential Air Sampler w/VSCC 40900
CBSA Name State FIPS Code State
<char> <int> <char>
1: Sacramento--Roseville--Arden-Arcade, CA 6 California
2: Sacramento--Roseville--Arden-Arcade, CA 6 California
3: Sacramento--Roseville--Arden-Arcade, CA 6 California
4: Sacramento--Roseville--Arden-Arcade, CA 6 California
5: Sacramento--Roseville--Arden-Arcade, CA 6 California
6: Sacramento--Roseville--Arden-Arcade, CA 6 California
County FIPS Code County Site Latitude Site Longitude
<int> <char> <num> <num>
1: 113 Yolo 38.66121 -121.7327
2: 113 Yolo 38.66121 -121.7327
3: 113 Yolo 38.66121 -121.7327
4: 113 Yolo 38.66121 -121.7327
5: 113 Yolo 38.66121 -121.7327
6: 113 Yolo 38.66121 -121.7327
#quick look at the variablesstr(two)
Classes 'data.table' and 'data.frame': 15976 obs. of 22 variables:
$ Date : chr "01/05/2002" "01/06/2002" "01/08/2002" "01/11/2002" ...
$ Source : chr "AQS" "AQS" "AQS" "AQS" ...
$ Site ID : int 60010007 60010007 60010007 60010007 60010007 60010007 60010007 60010007 60010007 60010007 ...
$ POC : int 1 1 1 1 1 1 1 1 1 1 ...
$ Daily Mean PM2.5 Concentration: num 25.1 31.6 21.4 25.9 34.5 41 29.3 15 18.8 37.9 ...
$ Units : chr "ug/m3 LC" "ug/m3 LC" "ug/m3 LC" "ug/m3 LC" ...
$ Daily AQI Value : int 81 93 74 82 98 115 89 62 69 107 ...
$ Local Site Name : chr "Livermore" "Livermore" "Livermore" "Livermore" ...
$ Daily Obs Count : int 1 1 1 1 1 1 1 1 1 1 ...
$ Percent Complete : num 100 100 100 100 100 100 100 100 100 100 ...
$ AQS Parameter Code : int 88101 88101 88101 88101 88101 88101 88101 88101 88101 88101 ...
$ AQS Parameter Description : chr "PM2.5 - Local Conditions" "PM2.5 - Local Conditions" "PM2.5 - Local Conditions" "PM2.5 - Local Conditions" ...
$ Method Code : int 120 120 120 120 120 120 120 120 120 120 ...
$ Method Description : chr "Andersen RAAS2.5-300 PM2.5 SEQ w/WINS" "Andersen RAAS2.5-300 PM2.5 SEQ w/WINS" "Andersen RAAS2.5-300 PM2.5 SEQ w/WINS" "Andersen RAAS2.5-300 PM2.5 SEQ w/WINS" ...
$ CBSA Code : int 41860 41860 41860 41860 41860 41860 41860 41860 41860 41860 ...
$ CBSA Name : chr "San Francisco-Oakland-Hayward, CA" "San Francisco-Oakland-Hayward, CA" "San Francisco-Oakland-Hayward, CA" "San Francisco-Oakland-Hayward, CA" ...
$ State FIPS Code : int 6 6 6 6 6 6 6 6 6 6 ...
$ State : chr "California" "California" "California" "California" ...
$ County FIPS Code : int 1 1 1 1 1 1 1 1 1 1 ...
$ County : chr "Alameda" "Alameda" "Alameda" "Alameda" ...
$ Site Latitude : num 37.7 37.7 37.7 37.7 37.7 ...
$ Site Longitude : num -122 -122 -122 -122 -122 ...
- attr(*, ".internal.selfref")=<externalptr>
str(ttwo)
Classes 'data.table' and 'data.frame': 59756 obs. of 22 variables:
$ Date : chr "01/01/2022" "01/02/2022" "01/03/2022" "01/04/2022" ...
$ Source : chr "AQS" "AQS" "AQS" "AQS" ...
$ Site ID : int 60010007 60010007 60010007 60010007 60010007 60010007 60010007 60010007 60010007 60010007 ...
$ POC : int 3 3 3 3 3 3 3 3 3 3 ...
$ Daily Mean PM2.5 Concentration: num 12.7 13.9 7.1 3.7 4.2 3.8 2.3 6.9 13.6 11.2 ...
$ Units : chr "ug/m3 LC" "ug/m3 LC" "ug/m3 LC" "ug/m3 LC" ...
$ Daily AQI Value : int 58 60 39 21 23 21 13 38 59 55 ...
$ Local Site Name : chr "Livermore" "Livermore" "Livermore" "Livermore" ...
$ Daily Obs Count : int 1 1 1 1 1 1 1 1 1 1 ...
$ Percent Complete : num 100 100 100 100 100 100 100 100 100 100 ...
$ AQS Parameter Code : int 88101 88101 88101 88101 88101 88101 88101 88101 88101 88101 ...
$ AQS Parameter Description : chr "PM2.5 - Local Conditions" "PM2.5 - Local Conditions" "PM2.5 - Local Conditions" "PM2.5 - Local Conditions" ...
$ Method Code : int 170 170 170 170 170 170 170 170 170 170 ...
$ Method Description : chr "Met One BAM-1020 Mass Monitor w/VSCC" "Met One BAM-1020 Mass Monitor w/VSCC" "Met One BAM-1020 Mass Monitor w/VSCC" "Met One BAM-1020 Mass Monitor w/VSCC" ...
$ CBSA Code : int 41860 41860 41860 41860 41860 41860 41860 41860 41860 41860 ...
$ CBSA Name : chr "San Francisco-Oakland-Hayward, CA" "San Francisco-Oakland-Hayward, CA" "San Francisco-Oakland-Hayward, CA" "San Francisco-Oakland-Hayward, CA" ...
$ State FIPS Code : int 6 6 6 6 6 6 6 6 6 6 ...
$ State : chr "California" "California" "California" "California" ...
$ County FIPS Code : int 1 1 1 1 1 1 1 1 1 1 ...
$ County : chr "Alameda" "Alameda" "Alameda" "Alameda" ...
$ Site Latitude : num 37.7 37.7 37.7 37.7 37.7 ...
$ Site Longitude : num -122 -122 -122 -122 -122 ...
- attr(*, ".internal.selfref")=<externalptr>
#closer look at key variablessummary(two$`Daily Mean PM2.5 Concentration`)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.00 7.00 12.00 16.12 20.50 104.30
head(two[order(two$`Daily Mean PM2.5 Concentration`), ])
Date Source Site ID POC Daily Mean PM2.5 Concentration Units
<char> <char> <int> <int> <num> <char>
1: 02/19/2002 AQS 60150002 1 0.0 ug/m3 LC
2: 07/07/2002 AQS 60450006 1 0.0 ug/m3 LC
3: 02/19/2002 AQS 60893003 1 0.0 ug/m3 LC
4: 03/06/2002 AQS 60179000 1 0.1 ug/m3 LC
5: 12/16/2002 AQS 60199000 1 0.1 ug/m3 LC
6: 12/16/2002 AQS 60519000 1 0.1 ug/m3 LC
Daily AQI Value Local Site Name
<int> <char>
1: 0 Redwood NP
2: 0 Ukiah-Library
3: 0 Lassen Volcanic NP - Manzanita Lake Fire Station
4: 1 Bliss SP
5: 1 Kaiser Wilderness
6: 1 Hoover Wilderness
Daily Obs Count Percent Complete AQS Parameter Code
<int> <num> <int>
1: 1 100 88502
2: 1 100 88101
3: 1 100 88502
4: 1 100 88502
5: 1 100 88502
6: 1 100 88502
AQS Parameter Description Method Code
<char> <int>
1: Acceptable PM2.5 AQI & Speciation Mass 707
2: PM2.5 - Local Conditions 117
3: Acceptable PM2.5 AQI & Speciation Mass 707
4: Acceptable PM2.5 AQI & Speciation Mass 707
5: Acceptable PM2.5 AQI & Speciation Mass 707
6: Acceptable PM2.5 AQI & Speciation Mass 707
Method Description CBSA Code
<char> <int>
1: IMPROVE Module A with Cyclone Inlet-Teflon Filter, 2.2 sq. cm. 18860
2: R & P Model 2000 PM2.5 Sampler w/WINS 46380
3: IMPROVE Module A with Cyclone Inlet-Teflon Filter, 2.2 sq. cm. 39820
4: IMPROVE Module A with Cyclone Inlet-Teflon Filter, 2.2 sq. cm. 40900
5: IMPROVE Module A with Cyclone Inlet-Teflon Filter, 2.2 sq. cm. 23420
6: IMPROVE Module A with Cyclone Inlet-Teflon Filter, 2.2 sq. cm. NA
CBSA Name State FIPS Code State
<char> <int> <char>
1: Crescent City, CA 6 California
2: Ukiah, CA 6 California
3: Redding, CA 6 California
4: Sacramento--Roseville--Arden-Arcade, CA 6 California
5: Fresno, CA 6 California
6: 6 California
County FIPS Code County Site Latitude Site Longitude
<int> <char> <num> <num>
1: 15 Del Norte 41.56095 -124.0840
2: 45 Mendocino 39.15047 -123.2065
3: 89 Shasta 40.53999 -121.5765
4: 17 El Dorado 38.97600 -120.1035
5: 19 Fresno 37.22064 -119.1556
6: 51 Mono 38.08802 -119.1781
tail(two[order(two$`Daily Mean PM2.5 Concentration`), ])
Date Source Site ID POC Daily Mean PM2.5 Concentration Units
<char> <char> <int> <int> <num> <char>
1: 11/30/2002 AQS 60674001 1 91.0 ug/m3 LC
2: 02/05/2002 AQS 60290014 4 91.7 ug/m3 LC
3: 11/28/2002 AQS 60190008 5 92.5 ug/m3 LC
4: 02/04/2002 AQS 60290014 4 93.9 ug/m3 LC
5: 11/29/2002 AQS 60290014 3 102.7 ug/m3 LC
6: 11/29/2002 AQS 60290014 4 104.3 ug/m3 LC
Daily AQI Value Local Site Name Daily Obs Count
<int> <char> <int>
1: 176 Sacramento Health Department-Stockton Blvd. 1
2: 176 Bakersfield-California 1
3: 177 3425 N FIRST ST, FRESNO 1
4: 178 Bakersfield-California 1
5: 184 Bakersfield-California 1
6: 185 Bakersfield-California 1
Percent Complete AQS Parameter Code AQS Parameter Description
<num> <int> <char>
1: 100 88101 PM2.5 - Local Conditions
2: 100 88502 Acceptable PM2.5 AQI & Speciation Mass
3: 100 88502 Acceptable PM2.5 AQI & Speciation Mass
4: 100 88502 Acceptable PM2.5 AQI & Speciation Mass
5: 100 88502 Acceptable PM2.5 AQI & Speciation Mass
6: 100 88502 Acceptable PM2.5 AQI & Speciation Mass
Method Code Method Description CBSA Code
<int> <char> <int>
1: 120 Andersen RAAS2.5-300 PM2.5 SEQ w/WINS 40900
2: 731 Met-One BAM-1020 W/PM2.5 SCC 12540
3: 810 Met One SASS/SuperSASS Teflon 23420
4: 731 Met-One BAM-1020 W/PM2.5 SCC 12540
5: 731 Met-One BAM-1020 W/PM2.5 SCC 12540
6: 731 Met-One BAM-1020 W/PM2.5 SCC 12540
CBSA Name State FIPS Code State
<char> <int> <char>
1: Sacramento--Roseville--Arden-Arcade, CA 6 California
2: Bakersfield, CA 6 California
3: Fresno, CA 6 California
4: Bakersfield, CA 6 California
5: Bakersfield, CA 6 California
6: Bakersfield, CA 6 California
County FIPS Code County Site Latitude Site Longitude
<int> <char> <num> <num>
1: 67 Sacramento 38.55633 -121.4585
2: 29 Kern 35.35661 -119.0626
3: 19 Fresno 36.78133 -119.7732
4: 29 Kern 35.35661 -119.0626
5: 29 Kern 35.35661 -119.0626
6: 29 Kern 35.35661 -119.0626
ttwo1 <- ttwo[ttwo$`Daily Mean PM2.5 Concentration`>0, ]summary(ttwo1$`Daily Mean PM2.5 Concentration`)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.10 4.10 6.90 8.48 10.70 302.50
head(ttwo1[order(ttwo1$`Daily Mean PM2.5 Concentration`), ])
Date Source Site ID POC Daily Mean PM2.5 Concentration Units
<char> <char> <int> <int> <num> <char>
1: 12/31/2022 AQS 60074001 3 0.1 ug/m3 LC
2: 07/07/2022 AQS 60150007 1 0.1 ug/m3 LC
3: 07/28/2022 AQS 60150007 1 0.1 ug/m3 LC
4: 11/08/2022 AQS 60192008 3 0.1 ug/m3 LC
5: 12/11/2022 AQS 60192009 3 0.1 ug/m3 LC
6: 12/10/2022 AQS 60199000 1 0.1 ug/m3 LC
Daily AQI Value Local Site Name Daily Obs Count
<int> <char> <int>
1: 1 TRAFFIC, RURAL PAVED ROAD 1
2: 1 Crescent City-Crescent Elk School 1
3: 1 Crescent City-Crescent Elk School 1
4: 1 Huron 1
5: 1 Tranquillity 1
6: 1 Kaiser Wilderness 1
Percent Complete AQS Parameter Code AQS Parameter Description
<num> <int> <char>
1: 100 88502 Acceptable PM2.5 AQI & Speciation Mass
2: 100 88502 Acceptable PM2.5 AQI & Speciation Mass
3: 100 88502 Acceptable PM2.5 AQI & Speciation Mass
4: 100 88502 Acceptable PM2.5 AQI & Speciation Mass
5: 100 88101 PM2.5 - Local Conditions
6: 100 88502 Acceptable PM2.5 AQI & Speciation Mass
Method Code Method Description
<int> <char>
1: 731 Met-One BAM-1020 W/PM2.5 SCC
2: 731 Met-One BAM-1020 W/PM2.5 SCC
3: 731 Met-One BAM-1020 W/PM2.5 SCC
4: 731 Met-One BAM-1020 W/PM2.5 SCC
5: 170 Met One BAM-1020 Mass Monitor w/VSCC
6: 707 IMPROVE Module A with Cyclone Inlet-Teflon Filter, 2.2 sq. cm.
CBSA Code CBSA Name State FIPS Code State County FIPS Code
<int> <char> <int> <char> <int>
1: 17020 Chico, CA 6 California 7
2: 18860 Crescent City, CA 6 California 15
3: 18860 Crescent City, CA 6 California 15
4: 23420 Fresno, CA 6 California 19
5: 23420 Fresno, CA 6 California 19
6: 23420 Fresno, CA 6 California 19
County Site Latitude Site Longitude
<char> <num> <num>
1: Butte 39.32756 -121.6688
2: Del Norte 41.75613 -124.2035
3: Del Norte 41.75613 -124.2035
4: Fresno 36.19867 -120.1011
5: Fresno 36.63423 -120.3823
6: Fresno 37.22064 -119.1556
tail(ttwo1[order(ttwo1$`Daily Mean PM2.5 Concentration`), ])
Date Source Site ID POC Daily Mean PM2.5 Concentration Units
<char> <char> <int> <int> <num> <char>
1: 09/10/2022 AQS 60570005 3 218.2 ug/m3 LC
2: 09/10/2022 AQS 60610004 3 243.9 ug/m3 LC
3: 08/15/2022 AQS 61050002 1 244.7 ug/m3 LC
4: 08/14/2022 AQS 61050002 1 246.2 ug/m3 LC
5: 09/16/2022 AQS 60611004 3 296.3 ug/m3 LC
6: 07/31/2022 AQS 60932001 3 302.5 ug/m3 LC
Daily AQI Value Local Site Name Daily Obs Count
<int> <char> <int>
1: 293 Grass Valley-Litton Building 1
2: 338 Colfax-City Hall 1
3: 339 Weaverville-Courthouse 1
4: 342 Weaverville-Courthouse 1
5: 442 Tahoe City-Fairway Drive 1
6: 454 Yreka 1
Percent Complete AQS Parameter Code AQS Parameter Description
<num> <int> <char>
1: 100 88101 PM2.5 - Local Conditions
2: 100 88502 Acceptable PM2.5 AQI & Speciation Mass
3: 100 88502 Acceptable PM2.5 AQI & Speciation Mass
4: 100 88502 Acceptable PM2.5 AQI & Speciation Mass
5: 100 88502 Acceptable PM2.5 AQI & Speciation Mass
6: 100 88101 PM2.5 - Local Conditions
Method Code Method Description CBSA Code
<int> <char> <int>
1: 209 Met One BAM-1022 Mass Monitor w/ VSCC or TE-PM2.5C 46020
2: 731 Met-One BAM-1020 W/PM2.5 SCC 40900
3: 731 Met-One BAM-1020 W/PM2.5 SCC NA
4: 731 Met-One BAM-1020 W/PM2.5 SCC NA
5: 731 Met-One BAM-1020 W/PM2.5 SCC 40900
6: 170 Met One BAM-1020 Mass Monitor w/VSCC NA
CBSA Name State FIPS Code State
<char> <int> <char>
1: Truckee-Grass Valley, CA 6 California
2: Sacramento--Roseville--Arden-Arcade, CA 6 California
3: 6 California
4: 6 California
5: Sacramento--Roseville--Arden-Arcade, CA 6 California
6: 6 California
County FIPS Code County Site Latitude Site Longitude
<int> <char> <num> <num>
1: 57 Nevada 39.23348 -121.0556
2: 61 Placer 39.10017 -120.9538
3: 105 Trinity 40.73475 -122.9412
4: 105 Trinity 40.73475 -122.9412
5: 61 Placer 39.16602 -120.1488
6: 93 Siskiyou 41.72689 -122.6336
dim(ttwo)
[1] 59756 22
When looking more closely at the Daily Mean PM2.5 Concentration variable in both data sets, I noticed that the 2022 data set had PM2.5 values less than 0, which shouldn’t be possible. I removed these observations from the data. Additionally, both data sets have maximum PM 2.5 values that are much higher than the 3rd quartile values. However, I could not find anything online that would prompt me to remove these high observations. There are 15976 observations for the 2002 data and the mean Daily Mean PM2.5 Concentration is 16.12. For the 2022 data, after removing the observations below zero, there are 59413 observations and the mean Daily Mean PM2.5 Concentration is 8.48. As the means are higher than the medians for both data sets, the data seems to be skewed right. Furthermore, date is written as a character instead of numerically.
Data Merging
library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:data.table':
between, first, last
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
two[, Year :=2002]ttwo1[, Year :=2022]total <-rbind(two, ttwo1)head(total[order(total$`Date`), ])
Date Source Site ID POC Daily Mean PM2.5 Concentration Units
<char> <char> <int> <int> <num> <char>
1: 01/01/2002 AQS 60074001 3 10.6 ug/m3 LC
2: 01/01/2002 AQS 60130002 1 20.9 ug/m3 LC
3: 01/01/2002 AQS 60290014 1 26.1 ug/m3 LC
4: 01/01/2002 AQS 60290014 3 30.3 ug/m3 LC
5: 01/01/2002 AQS 60290014 4 31.1 ug/m3 LC
6: 01/01/2002 AQS 60370002 1 32.3 ug/m3 LC
Daily AQI Value Local Site Name Daily Obs Count Percent Complete
<int> <char> <int> <num>
1: 54 TRAFFIC, RURAL PAVED ROAD 1 100
2: 73 Concord 1 100
3: 83 Bakersfield-California 1 100
4: 90 Bakersfield-California 1 100
5: 92 Bakersfield-California 1 100
6: 94 Azusa 1 100
AQS Parameter Code AQS Parameter Description Method Code
<int> <char> <int>
1: 88502 Acceptable PM2.5 AQI & Speciation Mass 731
2: 88101 PM2.5 - Local Conditions 120
3: 88101 PM2.5 - Local Conditions 120
4: 88502 Acceptable PM2.5 AQI & Speciation Mass 731
5: 88502 Acceptable PM2.5 AQI & Speciation Mass 731
6: 88101 PM2.5 - Local Conditions 120
Method Description CBSA Code
<char> <int>
1: Met-One BAM-1020 W/PM2.5 SCC 17020
2: Andersen RAAS2.5-300 PM2.5 SEQ w/WINS 41860
3: Andersen RAAS2.5-300 PM2.5 SEQ w/WINS 12540
4: Met-One BAM-1020 W/PM2.5 SCC 12540
5: Met-One BAM-1020 W/PM2.5 SCC 12540
6: Andersen RAAS2.5-300 PM2.5 SEQ w/WINS 31080
CBSA Name State FIPS Code State
<char> <int> <char>
1: Chico, CA 6 California
2: San Francisco-Oakland-Hayward, CA 6 California
3: Bakersfield, CA 6 California
4: Bakersfield, CA 6 California
5: Bakersfield, CA 6 California
6: Los Angeles-Long Beach-Anaheim, CA 6 California
County FIPS Code County Site Latitude Site Longitude Year
<int> <char> <num> <num> <num>
1: 7 Butte 39.32756 -121.6688 2002
2: 13 Contra Costa 37.93601 -122.0262 2002
3: 29 Kern 35.35661 -119.0626 2002
4: 29 Kern 35.35661 -119.0626 2002
5: 29 Kern 35.35661 -119.0626 2002
6: 37 Los Angeles 34.13650 -117.9239 2002
tail(total[order(total$`Date`), ])
Date Source Site ID POC Daily Mean PM2.5 Concentration Units
<char> <char> <int> <int> <num> <char>
1: 12/31/2022 AQS 61111004 3 0.1 ug/m3 LC
2: 12/31/2022 AQS 61112002 3 1.0 ug/m3 LC
3: 12/31/2022 AQS 61112002 4 1.5 ug/m3 LC
4: 12/31/2022 AQS 61113001 3 0.7 ug/m3 LC
5: 12/31/2022 AQS 61130004 3 2.0 ug/m3 LC
6: 12/31/2022 AQS 61131003 1 1.0 ug/m3 LC
Daily AQI Value Local Site Name Daily Obs Count Percent Complete
<int> <char> <int> <num>
1: 1 Ojai - East Ojai Ave 1 100
2: 6 Simi Valley-Cochran Street 1 100
3: 8 Simi Valley-Cochran Street 1 100
4: 4 El Rio-Rio Mesa School #2 1 100
5: 11 Davis-UCD Campus 1 100
6: 6 Woodland-Gibson Road 1 100
AQS Parameter Code AQS Parameter Description Method Code
<int> <char> <int>
1: 88101 PM2.5 - Local Conditions 170
2: 88101 PM2.5 - Local Conditions 170
3: 88101 PM2.5 - Local Conditions 170
4: 88101 PM2.5 - Local Conditions 170
5: 88502 Acceptable PM2.5 AQI & Speciation Mass 731
6: 88101 PM2.5 - Local Conditions 145
Method Description CBSA Code
<char> <int>
1: Met One BAM-1020 Mass Monitor w/VSCC 37100
2: Met One BAM-1020 Mass Monitor w/VSCC 37100
3: Met One BAM-1020 Mass Monitor w/VSCC 37100
4: Met One BAM-1020 Mass Monitor w/VSCC 37100
5: Met-One BAM-1020 W/PM2.5 SCC 40900
6: R & P Model 2025 PM-2.5 Sequential Air Sampler w/VSCC 40900
CBSA Name State FIPS Code State
<char> <int> <char>
1: Oxnard-Thousand Oaks-Ventura, CA 6 California
2: Oxnard-Thousand Oaks-Ventura, CA 6 California
3: Oxnard-Thousand Oaks-Ventura, CA 6 California
4: Oxnard-Thousand Oaks-Ventura, CA 6 California
5: Sacramento--Roseville--Arden-Arcade, CA 6 California
6: Sacramento--Roseville--Arden-Arcade, CA 6 California
County FIPS Code County Site Latitude Site Longitude Year
<int> <char> <num> <num> <num>
1: 111 Ventura 34.44806 -119.2313 2022
2: 111 Ventura 34.27632 -118.6837 2022
3: 111 Ventura 34.27632 -118.6837 2022
4: 111 Ventura 34.25239 -119.1432 2022
5: 113 Yolo 38.53445 -121.7734 2022
6: 113 Yolo 38.66121 -121.7327 2022
total <- total %>%rename(PM25 ='Daily Mean PM2.5 Concentration', slat =`Site Latitude`, slon =`Site Longitude`)
There are many more monitoring sites in 2022 compared to 2002. Whereas the 2002 monitoring sites were very distant from one another and sparsely covered California, the 2022 monitoring sites span California’s coastal and valley region from north to south, providing better coverage over the state.
Missing or Implausible Values of PM2.5
summary(total$PM25)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.0 4.5 7.6 10.1 12.3 302.5
ttwo[, Year :=2022]total1 <-rbind(two, ttwo)total1 <- total1 %>%rename(PM25 ='Daily Mean PM2.5 Concentration')total1$Date <-as.Date(total1$Date, format ="%m/%d/%Y")implausible <- total1[total1$PM25 <0| total1$PM25 >100]dim(implausible)
[1] 254 23
dim(total1)
[1] 75732 23
library(ggplot2)implausible[!is.na(PM25) &!is.na(Date)] |>ggplot(mapping =aes(x = Date, y = PM25, color = Year)) +geom_point() +geom_jitter() +labs(title ='Implausible values of PM2.5 in 2002 and 2022')
two100 <- two[two$`Daily Mean PM2.5 Concentration`>100]print(two100)
Date Source Site ID POC Daily Mean PM2.5 Concentration Units
<char> <char> <int> <int> <num> <char>
1: 11/29/2002 AQS 60290014 3 102.7 ug/m3 LC
2: 11/29/2002 AQS 60290014 4 104.3 ug/m3 LC
Daily AQI Value Local Site Name Daily Obs Count Percent Complete
<int> <char> <int> <num>
1: 184 Bakersfield-California 1 100
2: 185 Bakersfield-California 1 100
AQS Parameter Code AQS Parameter Description Method Code
<int> <char> <int>
1: 88502 Acceptable PM2.5 AQI & Speciation Mass 731
2: 88502 Acceptable PM2.5 AQI & Speciation Mass 731
Method Description CBSA Code CBSA Name State FIPS Code
<char> <int> <char> <int>
1: Met-One BAM-1020 W/PM2.5 SCC 12540 Bakersfield, CA 6
2: Met-One BAM-1020 W/PM2.5 SCC 12540 Bakersfield, CA 6
State County FIPS Code County Site Latitude Site Longitude Year
<char> <int> <char> <num> <num> <num>
1: California 29 Kern 35.35661 -119.0626 2002
2: California 29 Kern 35.35661 -119.0626 2002
ttwo100 <- ttwo[ttwo$`Daily Mean PM2.5 Concentration`<0| ttwo$`Daily Mean PM2.5 Concentration`>100]ttwo100 <- ttwo100[order(ttwo100$`Site ID`, )]print(ttwo100)
Date Source Site ID POC Daily Mean PM2.5 Concentration Units
<char> <char> <int> <int> <num> <char>
1: 07/06/2022 AQS 60010011 3 -0.7 ug/m3 LC
2: 07/30/2022 AQS 60010011 3 -0.1 ug/m3 LC
3: 08/26/2022 AQS 60010011 3 -0.5 ug/m3 LC
4: 02/01/2022 AQS 60072002 3 -0.3 ug/m3 LC
5: 02/06/2022 AQS 60072002 3 -0.1 ug/m3 LC
---
248: 06/11/2022 AQS 61130004 3 -0.8 ug/m3 LC
249: 06/12/2022 AQS 61130004 3 -0.4 ug/m3 LC
250: 07/06/2022 AQS 61130004 3 -0.6 ug/m3 LC
251: 11/02/2022 AQS 61130004 3 -0.1 ug/m3 LC
252: 11/03/2022 AQS 61130004 3 -0.1 ug/m3 LC
Daily AQI Value Local Site Name Daily Obs Count Percent Complete
<int> <char> <int> <num>
1: 0 Oakland West 1 100
2: 0 Oakland West 1 100
3: 0 Oakland West 1 100
4: 0 Paradise - Theater 1 100
5: 0 Paradise - Theater 1 100
---
248: 0 Davis-UCD Campus 1 100
249: 0 Davis-UCD Campus 1 100
250: 0 Davis-UCD Campus 1 100
251: 0 Davis-UCD Campus 1 100
252: 0 Davis-UCD Campus 1 100
AQS Parameter Code AQS Parameter Description Method Code
<int> <char> <int>
1: 88101 PM2.5 - Local Conditions 170
2: 88101 PM2.5 - Local Conditions 170
3: 88101 PM2.5 - Local Conditions 170
4: 88502 Acceptable PM2.5 AQI & Speciation Mass 171
5: 88502 Acceptable PM2.5 AQI & Speciation Mass 171
---
248: 88502 Acceptable PM2.5 AQI & Speciation Mass 731
249: 88502 Acceptable PM2.5 AQI & Speciation Mass 731
250: 88502 Acceptable PM2.5 AQI & Speciation Mass 731
251: 88502 Acceptable PM2.5 AQI & Speciation Mass 731
252: 88502 Acceptable PM2.5 AQI & Speciation Mass 731
Method Description CBSA Code
<char> <int>
1: Met One BAM-1020 Mass Monitor w/VSCC 41860
2: Met One BAM-1020 Mass Monitor w/VSCC 41860
3: Met One BAM-1020 Mass Monitor w/VSCC 41860
4: Met-one BAM-1022 W/PM2.5 SCC 17020
5: Met-one BAM-1022 W/PM2.5 SCC 17020
---
248: Met-One BAM-1020 W/PM2.5 SCC 40900
249: Met-One BAM-1020 W/PM2.5 SCC 40900
250: Met-One BAM-1020 W/PM2.5 SCC 40900
251: Met-One BAM-1020 W/PM2.5 SCC 40900
252: Met-One BAM-1020 W/PM2.5 SCC 40900
CBSA Name State FIPS Code State
<char> <int> <char>
1: San Francisco-Oakland-Hayward, CA 6 California
2: San Francisco-Oakland-Hayward, CA 6 California
3: San Francisco-Oakland-Hayward, CA 6 California
4: Chico, CA 6 California
5: Chico, CA 6 California
---
248: Sacramento--Roseville--Arden-Arcade, CA 6 California
249: Sacramento--Roseville--Arden-Arcade, CA 6 California
250: Sacramento--Roseville--Arden-Arcade, CA 6 California
251: Sacramento--Roseville--Arden-Arcade, CA 6 California
252: Sacramento--Roseville--Arden-Arcade, CA 6 California
County FIPS Code County Site Latitude Site Longitude Year
<int> <char> <num> <num> <num>
1: 1 Alameda 37.81478 -122.2823 2022
2: 1 Alameda 37.81478 -122.2823 2022
3: 1 Alameda 37.81478 -122.2823 2022
4: 7 Butte 39.77919 -121.5914 2022
5: 7 Butte 39.77919 -121.5914 2022
---
248: 113 Yolo 38.53445 -121.7734 2022
249: 113 Yolo 38.53445 -121.7734 2022
250: 113 Yolo 38.53445 -121.7734 2022
251: 113 Yolo 38.53445 -121.7734 2022
252: 113 Yolo 38.53445 -121.7734 2022
dim(ttwo100)
[1] 252 23
two100$Date <-as.Date(two100$Date, format ="%m/%d/%Y")ttwo100$Date <-as.Date(ttwo100$Date, format ="%m/%d/%Y")ttwo100[!is.na(`Daily Mean PM2.5 Concentration`) &!is.na(Date)] |>ggplot(mapping =aes(x = Date, y =`Daily Mean PM2.5 Concentration`, color =`Site ID`)) +geom_point() +geom_jitter() +labs(title ='Implausible values of PM2.5 in 2022') +scale_x_date(date_breaks ="1 month", date_labels ="%b")
There are no missing values. As stated in part 1, I removed the PM2.5 values that were less than zero and found many values I thought may be too high (in this case, I will define these as being greater than 100). The number of implausible PM2.5 values is 254, which makes up approximately 0.33% of the data. Adding back these values into the data set, we can look for temporal patterns in these observations. Looking at the above plot, we can see that there were only 2 PM2.5 values over 100 in 2002. Further investigation shows that these values were both observed in Bakersfield, California on November 29, 2002. This could be an error or a rare observation. In 2022, however, we see many “implausible” values. There are many different sites that give values under 0 and over 100. It seems that negative PM2.5 readings were consistent throughout the year, but really high PM2.5 readings were more likely to occur from August to October.
Have Daily Concentrations of PM2.5 Decreased in California over the last 20 years?
#Statelibrary(data.table)total[, Year :=as.factor(Year)]total[!is.na(PM25) &!is.na(Year)] |>ggplot() +stat_summary(mapping =aes(x = Year, y = PM25), fun = mean,geom ="point") +stat_summary(mapping =aes(x = Year, y = PM25),fun.data ="mean_sdl",geom ="errorbar") +labs(title ="Average PM2.5 in California By Year", x ="PM2.5 (ug/m3)", y ="Year")
#Countycounty_avg <- total[,.(PM25 =mean(PM25,na.rm=TRUE)), by=c("County", "Year")]county_avg[!is.na(PM25) &!is.na(County) &!is.na(Year)] |>ggplot() +geom_col(mapping =aes(x = County, y = PM25, fill = Year), position ="dodge") +labs(title ="Average PM2.5 In Each County By Year") +labs(x ="County", y ="Average PM2.5 (ug/m3)") +scale_fill_brewer(palette ="Pastel2") +theme(axis.text.x =element_text(angle =90, hjust =1))
#Sites in Los Angelessites <- total[total$`County`=="Los Angeles"]sites[, `Site ID`:=as.factor(`Site ID`)]sites_avg <- sites[,.(PM25 =mean(PM25,na.rm=TRUE)), by=c("Site ID", "Year")]sites_avg[!is.na(PM25) &!is.na(`Site ID`) &!is.na(Year)] |>ggplot() +geom_col(mapping =aes(x =`Site ID`, y = PM25, fill = Year), position ="dodge") +labs(title ="Average PM2.5 In Each Los Angeles Site By Year") +labs(x ="Monitoring Site ID", y ="Average PM2.5 (ug/m3)") +scale_fill_brewer(palette ="Pastel2") +theme(axis.text.x =element_text(angle =90, hjust =1))
Note: The graphs and data are produced after removing all the PM2.5 values less than zero.
When looking at the mean PM2.5 values for the whole state of California, we see that the mean is lower in 2022 than 2002 (16.12 vs 8.48). However, this may be because there are more monitoring sites in 2022 and with more smaller values, the mean would be a lot smaller. Thus, we can look at each county individually. From this, we can see that the average PM2.5 values were higher in 2002 compared to 2022 for most of the counties with both measurements. Additionally, when looking at the monitoring sites just in Los Angeles, we see that for the sites that have both measurements, the PM2.5 values are lower in 2022 than they were in 2002. Thus, we can conclude that on average, daily concentrations of PM2.5 have decreased in California over the last 20 years.